% Compute a0, a1, a2 for the Critical value function
% This provides a Matlab version of the program used to compute the coefficients a0, a1, a2 for the critical value function.
% In the paper we use a Fortran to compute these coefficients for the various tests in the paper.
% We provide this Matlab program for illustrative purposes. It computes the coefficients for the test that mu=sigma/xi in the returns example.
% In the replication notes these are coefficients are shown in cvx_test3_1_194.csv
% Note that coefficients produced here differ slightly from those in cvx_test3_1_194.csv because of the random number generation.
%

clear all;
small = 1.0e-10;
pinv_tol = 1.0e-05;
big = 1.0e+8;
global datadir;
rng(1234);

% -- File Directories   
outdir = 'out/';
figdir = 'fig/';
matdir = 'mat/';

% Add new directory to path to find functions
addpath('../Matlab_Functions/');

% In this example I use parallel computing to speed up the computation. Comment out the following line if you do not have the parallel computing toolbox.
% Shut down current pool if it exists
delete(gcp('nocreate'));
% Start a new pool with 8 workers
parpool("Processes",8);

% Set options for optimizers
options_fminunc = optimset('Display','off','TolFun',1.0e-6);
options_fminsearch = optimset('Display','off','TolFun',1.0e-6);
options_fzero = optimset('Display','off');

% In this example the parameters are constructed for the k largest exceedances with k = 1 and T = 194. (These are parameters for the returns data.)
% We carry out the test for the null with mu = sigma/xi
k = 1;
T = 194;

% Parameters for simulations as described in Appendix B of the paper 
N = 10000;  % Number of replications
xi_min = 0.03;
xi_max = 1.5;
n_xi_grid = 10;
xi_grid = linspace(xi_min,xi_max,n_xi_grid);
linex_parm = 12;
h_scale = 0.3;     % Scale factor for h
h_pct = [93 97];   % Percentiles for computing h (the bandwidth)
sigma = 1;         % Value of Sigma in the GEV distribution .. this is a scale parameter .. we set it to 1
test_size = 0.05;  % Size of the test


% Step 1:  Generate the data and compute the MLEs and LR statistics for each value of xi

% Matrices to store xi_mle and LR Statistic
xi_mle_mat = NaN(N,n_xi_grid);
LR_stat_mat = NaN(N,n_xi_grid);

% Generate Exponential Random Variables. These are held fixed across the value of xi
E_mat = exprnd(1,k,T,N);
CumSum_E_mat = cumsum(E_mat,1);  % Cumulation sum in k dimension  (This is irrelevant for k=1, but included for generality for k>1 applications)
for i_grid = 1:n_xi_grid
    tic
    fprintf('xi value %d of %d\n',i_grid,n_xi_grid);
    xi = xi_grid(i_grid);
    mu = sigma/xi;                % Value of mu under the null
    z_mat = (CumSum_E_mat.^(-xi)-1)/xi;    % GEV draws with sigma = 1 and mu = 0
    y_mat = sigma*z_mat+mu;                  % GEV draws with sigma and mu
    x_start_unc = [xi sigma mu];             % Starting Values for optimizers -- use true values (unconstrained MLE)
    x_start_con = [xi sigma];                % Starting Values for optimizers -- use true values (constrained MLE)
    parfor i_N = 1:N
    %   for i_N = 1:N                  % Use FOR loop in place of PARFOR if parallel computing not used 
        y = squeeze(y_mat(:,:,i_N));  % k x T matrix of extremes
        Y = y';                       % Y is a T x k matrix of extremes
        % Find Unconstrained MLE of (xi,sigma,mu) for the k largest values ... use two methods to make sure min is found
        f_unc = @(x) minus_llf_gev(x,Y);
        [x_1,fval_1]=fminunc(f_unc,x_start_unc,options_fminunc); 
        [x_2,fval_2] = fminsearch(f_unc,x_start_unc,options_fminsearch); 
        if fval_1 < fval_2
             x_mle = x_1;
             fval = fval_1;
         else
            x_mle = x_2;
            fval = fval_2;
        end
        llf_unconstrained = -fval;
        xi_mle = x_mle(1); 
        % Construct constrained MLE ... use two methods to make sure min is found
        f_con = @(x) minus_llf_gev_con_mu(x,Y);
        [x_1,fval_1] = fminunc(f_con,x_start_con,options_fminunc); 
        [x_2,fval_2] = fminsearch(f_con,x_start_con,options_fminsearch); 
        if fval_1 < fval_2
            fval = fval_1;
        else
            fval = fval_2;
        end
        llf_constrained_mu = -fval;
        llf_dif = llf_unconstrained-llf_constrained_mu;

        % Save Results
        xi_mle_mat(i_N,i_grid) = xi_mle;
        LR_stat_mat(i_N,i_grid) = llf_dif;
        
    end
    toc
end

% Step 2: Use these results to compute the adjustment factor so that the critical value is 1.0

% Find the value of h (the bandwidth) from Appendix B.1
pct = prctile(LR_stat_mat(:,5),h_pct);
h = h_scale*(pct(2)-pct(1));

% Set up minimization problem
a_start = zeros(3,1); % Starting values for a0, a1, a2
% Set up minimization problem
f_cvadj = @(a) linex_cvadj(a,xi_mle_mat,LR_stat_mat,h,linex_parm,test_size);
[a_1,fval_1] = fminunc(f_cvadj,a_start,options_fminunc); 
[a_2,fval_2] = fminsearch(f_cvadj,a_start,options_fminsearch);
a = a_1;
if fval_2 < fval_1
    a = a_2;
end

% Adjust a(1) so that size is (slightly) less than target_size
a0 = a(1);
f_size_constraint = @(a0) size_constraint(a0,a,xi_mle_mat,LR_stat_mat,test_size);
a0_new = fzero(f_size_constraint,a0,options_fzero);
a(1) = a0_new;

% Compute the size from the simulated data
adj = exp(a(1)+a(2)*xi_mle_mat+a(3)*xi_mle_mat.^2);
adj_LR_stat = adj.*LR_stat_mat;
actual_size = max(mean(adj.*LR_stat_mat>1));

% Summarize the results 
fprintf('\n \n Summary of Results for LR adjustment function \n');
fprintf('a0 = %f, a1 = %f, a2 = %f\n',a(1),a(2),a(3));
fprintf('Actual Size = %f, Target Size = %f\n',actual_size,test_size);


% -------------------------- Functions --------------------------
function f = linex_cvadj(a,xi_mle_mat,LR_stat_mat,h,linex_parm,test_size)
   adj = exp(a(1)+a(2)*xi_mle_mat+a(3)*xi_mle_mat.^2);
   adj_LR_stat = adj.*LR_stat_mat;
   y = (1-adj_LR_stat)/h;
   norm_y = normcdf(y);
   mean_norm_y = mean(norm_y);
   z = logit(mean_norm_y) - logit((1-test_size));
   linex_z = linex(-linex_parm*z);
   f = sum(linex_z);
end

function f = linex(x);
    f = exp(x)-x-1;
end

function szd = size_constraint(a0,a,xi_mle_mat,LR_stat_mat,test_size)
    a(1) = a0;
    target_size = test_size - 0.001;   % Slightly less than desired size
    adj = exp(a(1)+a(2)*xi_mle_mat+a(3)*xi_mle_mat.^2);
    sz = max(mean(adj.*LR_stat_mat>1));
    szd = sz-target_size;
end

function f = logit(x)
    f = log(x./(1-x));
end

